Abstract
This notebook is a template for evaluating your forecaster against submissions from COVIDhub. After inputting a set of parameters (forecasters, COVID signals, etc), the templates yields a comprehensive report on the predictions of COVID forecasters as well as their performance compared to the ground truth.
\[\\[.5in]\]
Every week, forecasters submit their predictions to COVID-19 ForecastHub. An AWS bucket that contains the estimates of a handful of signals (e.g., COVID death, cases, hospitalization, etc). Furthermore, the AWS server stores an array of evaluation metrics of these forecasts (e.g., Absolute Error, Weighted Interval Score). Alternatively, the data can be retrieved from covidcast and covideval APIs.
To promote the flexibility to replicate the report, the data used in this report can be easily downloaded as a CSV file. By doing so, the user can generate customized plots or even include their own forecaster.
aheads <- as.numeric(params$weeks)
# if(params$outcome=="cases") {
# #This is for the cases:
# url_case <- "https://forecast-eval.s3.us-east-2.amazonaws.com/score_cards_state_cases.rds"
# download.file(url_case, "eval_cases.RDS") # download to disk
# scores <- readRDS(paste0(here(), "/eval_cases.RDS"))
# scores <- subset(scores, forecaster %in% params$forecasters)
# }
# if(params$outcome=="deaths"){
url_deaths <- "https://forecast-eval.s3.us-east-2.amazonaws.com/score_cards_state_deaths.rds"
download.file(url_deaths, "eval_deaths.RDS") # download to disk
scores <- readRDS(paste0(here(), "/eval_deaths.RDS"))
scores <- subset(scores, forecaster %in% params$forecasters)
# }
primary_forecaster <- params$primary_forecaster
our_pred_dates <-
scores %>%
filter(forecaster == "CMU-TimeSeries")
our_pred_dates <- unique(our_pred_dates$forecast_date)
n_dates <- length(our_pred_dates)
forecast_dates <- our_pred_dates[n_dates- 2 *5:2]
scores$forecast_date <-
if_else(scores$forecaster %in% c("Karlen-pypm", "CU-select"), scores$forecast_date + 1, scores$forecast_date)
scores <- subset(scores, forecast_date %in% forecast_dates & ahead <= aheads)
results <- intersect_averagers(scores, c("forecaster"), c("forecast_date", "geo_value")) %>%
select(c("ahead", "geo_value", "forecaster","forecast_date", "data_source", "signal","target_end_date","incidence_period","actual","wis","ae","cov_80"))
results %>%
group_by(forecast_date) %>%
summarise(n_distinct(geo_value))
results %>%
download_this(
output_name = "results",
output_extension = ".csv",
button_label = "Download Predictions Evaluation",
button_type = "success",
has_icon = TRUE,
csv2 = FALSE,
icon = "fa fa-save"
)
NOTE: Results are based on the following numbers of common locations
By Weeks Ahead
weeks.label = c("1 week ahead", "2 weeks ahead", "3 weeks ahead", "4 weeks ahead")
names(weeks.label) = c(1, 2, 3, 4)
subtitle = sprintf("Forecasts made over %s to %s",
format(min(forecast_dates), "%B %d, %Y"),
format(max(forecast_dates), "%B %d, %Y"))
plot_ae <-
plot_canonical(results,
x = "ahead",
y = "ae",
aggr = mean) +
labs(title = subtitle, x= "Weeks Ahead", y = "Mean AE",color='Forecasters') +
# geom_line(aes(linetype=forecaster, color=forecaster)) +
geom_point(aes(color=forecaster, text=sprintf("Weeks Ahead: %s<br>Average Error: %s <br>Forecaster: %s",
ahead,
round(ae, digits=2),
color)),
alpha = 0.05) +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5)) +
scale_y_log10()
if (params$colorblind_palette) {
plot_ae <- plot_ae +
scale_color_viridis_d()
}
ggplotly(plot_ae, tooltip="text", width=1000) %>%
layout(hoverlabel = list(bgcolor = "white"))
By Forecast Dates
plot_wis <-
plot_canonical(results,
x = "forecast_date",
y = "wis",
aggr = mean,
grp_vars = c("forecaster","ahead"),
facet_rows = "ahead") +
labs(title = subtitle,
x = "Forecast Dates",
y = "Mean WIS",
color = "Forecasters") +
geom_point(aes(text=sprintf("Forecast Date: %s<br>Mean WIS: %s <br>Forecaster: %s",
forecast_date,
round(wis, digits = 2),
color)),
alpha = 0.05) +
facet_wrap(~ahead, nrow = 4, labeller = labeller(ahead=weeks.label)) +
theme(strip.background = element_rect(fill = "white")) +
theme_minimal() +
theme(plot.title = element_text(hjust = "center")) +
scale_y_log10()
if (params$colorblind_palette) {
plot_wis <- plot_wis +
scale_color_viridis_d()
}
ggplotly(plot_wis, tooltip="text", height=800, width= 1000) %>%
layout(hoverlabel = list(bgcolor = "white"))
By Forecast Dates
plot_cov80 <-
plot_canonical(results,
x = "forecast_date",
y = "cov_80",
aggr = mean,
grp_vars = c("forecaster","ahead"),
facet_rows = "ahead") +
labs(title = subtitle, x= "Forecast date", y = "Mean Coverage 80", color='Forecasters') +
geom_point(aes(text = sprintf("Forecast Date: %s<br>Coverage: %s <br>Forecaster: %s",
forecast_date,
round(cov_80, digits = 2),
color)),
alpha = 0.05) +
facet_wrap(~ahead, nrow = 4, labeller = labeller(ahead = weeks.label)) +
theme(strip.background = element_rect(fill = "white")) +
theme_minimal() +
theme(plot.title = element_text(hjust = "center")) +
geom_line(mapping = aes(y = .8), )
if (params$colorblind_palette) {
plot_cov80 <- plot_cov80 +
scale_color_viridis_d()
}
ggplotly(plot_cov80, tooltip="text", height=800, width=1000) %>%
layout(hoverlabel = list(bgcolor = "white"))
Relative to baseline; scale first then take the geometric mean, ignoring a few 0’s.
geom_mean <- function(x) prod(x)^(1/length(x))
#geom_mean <- exp(mean(log((x+1)/(y+1)))) #still need to figure this out
mean_wis <-
plot_canonical(results %>%
filter(wis > 0),
x = "ahead",
y = "wis",
aggr = geom_mean,
base_forecaster = "COVIDhub-baseline",
scale_before_aggr = TRUE) +
labs(title = subtitle,
x = "Weeks Ahead",
y = "Geometric mean relative WIS",
color = "Forecasters") +
geom_point(aes(text = sprintf("Weeks Ahead: %s<br>WIS: %s <br>Forecaster: %s",
ahead,
round(wis, digits = 2),
color)),
alpha = 0.05) +
theme_minimal() +
theme(plot.title = element_text(hjust = "center")) +
geom_line(mapping = aes(y = 1))
if (params$colorblind_palette) {
mean_wis <- mean_wis +
scale_color_viridis_d()
}
ggplotly(mean_wis, tooltip="text", width= 1000) %>%
layout(hoverlabel = list(bgcolor = "white"))
mean_wis_forecast_date <-
plot_canonical(results %>%
filter(wis > 0),
x = "forecast_date",
y = "wis",
aggr = geom_mean,
facet_rows = "ahead",
grp_vars = c("forecaster", "ahead"),
base_forecaster = "COVIDhub-baseline",
scale_before_aggr = TRUE) +
theme(legend.position = "bottom") +
labs(title = subtitle,
x = "Forecast date",
y = "Geometric mean relative WIS",
color = "Forecasters") +
geom_point(aes(text = sprintf("Forecast Date: %s<br>WIS: %s <br>Forecaster: %s",
forecast_date,
round(wis, digits = 2),
color)),
alpha = 0.05) +
facet_wrap(~ahead, nrow = 4, labeller = labeller(ahead = weeks.label)) +
theme(strip.background = element_rect(fill = "white")) +
theme_minimal() +
theme(plot.title = element_text(hjust = "center")) +
geom_line(mapping = aes(y = 1))
if (params$colorblind_palette) {
mean_wis_forecast_date <- mean_wis_forecast_date +
scale_color_viridis_d()
}
ggplotly(mean_wis_forecast_date, tooltip = "text", height=800, width= 1000) %>%
layout(hoverlabel = list(bgcolor = "white"))
mean score over forecast dates and aheads Note that the results are scaled by population.
library(sf)
maps <- results %>%
group_by(geo_value, forecaster) %>%
summarise(across(wis:cov_80, mean)) %>%
left_join(animalia::state_population, by = "geo_value") %>%
mutate(across(wis:cov_80, ~ .x / population * 1e5)) %>%
pivot_longer(wis:cov_80, names_to = "score") %>%
group_by(score) %>%
mutate(time_value = Sys.Date(),
r = max(value)) %>%
group_by(forecaster, .add = TRUE) %>%
group_split()
# for county prediction, set geo_type = "county"
maps <- purrr::map(maps,
~as.covidcast_signal(
.x, signal = .x$score[1],
data_source = .x$forecaster[1],
geo_type = "state"))
maps <- purrr::map(maps,
~plot(.x,
choro_col = scales::viridis_pal()(3),
range = c(0,.x$r[1])))
nfcasts <- length(unique(results$forecaster))
# original code
cowplot::plot_grid(plotlist = maps[1:nfcasts], ncol = 3)
cowplot::plot_grid(plotlist = maps[(nfcasts+1):(nfcasts*2)], ncol = 3)
cowplot::plot_grid(plotlist = maps[((nfcasts*2)+1):length(maps)], ncol = 3)
options(timeout=300)
url4 <- "https://forecast-eval.s3.us-east-2.amazonaws.com/predictions_cards.rds"
download.file(url4, "predictions.RDS") # download to disk
state_predictions <- readRDS(paste0(here(), "/predictions.RDS"))
# for county prediction, set geo_type = "county"
state.label = c(state.name, "Washington D.C.", "Porto Rico")
names(state.label) = c(tolower(state.abb), "dc", "pr")
# trajectory plots for selected forecaster
pd <- evalcast:::setup_plot_trajectory(
state_predictions %>% filter(forecaster=="CMU-TimeSeries" & forecast_date %in% forecast_dates),
intervals = 0.8,
geo_type = "state",
start_day = min(forecast_dates) - 60)
g <- ggplot(pd$truth_df, mapping = aes(x = target_end_date))
# build the fan
g <- g + geom_ribbon(
data = pd$quantiles_df %>%
mutate(upper = round(upper, 2),
lower = round(lower, 2)),
mapping = aes(ymin = lower,
ymax = upper,
fill = forecaster,
group = interaction(forecaster, forecast_date)),
alpha = .1) +
scale_fill_viridis_d(begin=.15, end=.85)
# line layer
g <- g +
#geom_line(aes(y = .data$value.y), color = "#3182BD") + # corrected
geom_line(aes(y = value), size = .5) + # reported
geom_line(data = pd$points_df,
mapping = aes(y = value,
color = forecaster,
group = interaction(forecaster, forecast_date)),
size = .5) +
geom_point(aes(y = value), size = 1) + # reported gets dots
geom_point(data = pd$points_df %>%
mutate(value = round(value, 2)),
mapping = aes(y = value, color = forecaster),
size = 1) +
scale_color_viridis_d(begin=.15, end=.85)
states <- g +
facet_wrap(~geo_value,
scales = "free_y",
ncol = 3,
labeller = labeller(geo_value = state.label)) +
labs(x = "", y = "") +
theme_bw() +
theme(legend.position = "none", strip.background = element_rect(fill = "white"))
ggplotly(states, height=5000, width= 1000)